Notebook Summary

This script plots individual antibody trajectories in the Haiti cohort over successive measurement visits and also by age. Children were measured up to 9 times, but the majority were measured 5 times, so figures by measurement round include up to 6 visits.

Script preamble

## here() starts at /Users/benarnold/enterics-seroepi
## [1] "/Users/benarnold/enterics-seroepi"
## ── Attaching packages ────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.1.1       ✔ forcats 0.3.0
## ── Conflicts ───────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loading required package: iterators
## Loading required package: parallel

Identify incident changes

Among children, identify those that changed status between enrollment and follow-up. Those who changed from negative to positive are seroconverters (seroi below), and those who changed from positive to negative are seroreverters (seror below).

#-----------------------------
# identify seropositive measures
# hierarchy of information for 
# cutoffs:
# 1. ROC
# 2. mixture model based
# 3. estimated among presumed unexposed
#
# store the cutoff value used
# for figures
#-----------------------------
dl <- dl %>%
  mutate(seropos=ifelse(!is.na(posroc),posroc,posmix),
         serocut=ifelse(!is.na(posroc),roccut,mixcut),
         serocut_desc=ifelse(!is.na(posroc),"ROC","Mixture Model")) %>%
  mutate(seropos=ifelse(!is.na(seropos),seropos,posunex),
         serocut_desc=ifelse(!is.na(serocut),serocut_desc,"Unexp dist"),
         serocut=ifelse(!is.na(serocut),serocut,unexpcut)) 


#-----------------------------
# identify incident 
# seroconversions and reversions
#-----------------------------

# group the data by child and
# use lags to identify
# time in years between measurements,
# sero-conversions + sero-reversions 
# between measurements
# set the first measurement to 
# missing for the incidence indicators
# (using the is.na(age_diff) to identify them)
dl2 <- dl %>%
  group_by(antigen,id) %>% 
  arrange(antigen,id,sampleid) %>%
  mutate(age_min  = min(age),
         age_diff = age - lag(age),
         
         logmfi_lag  = lag(logmfi),
         logmfi_lead = lead(logmfi),
         logmfi_dlag  = logmfi - logmfi_lag,
         logmfi_dlead = logmfi_lead - logmfi,
         
         # incident seroconversions and reversions
         # including cumulative numbers
         seropos_lag  = lag(seropos),
         seroi = ifelse(seropos==1 & seropos_lag==0,1,0),
         seroi = ifelse(is.na(age_diff),NA,seroi),
         seroin = cumsum(ifelse(is.na(seroi),0,seroi)),
         seroin = ifelse(seroi==1,seroin,0),
         seror = ifelse(seropos==0 & seropos_lag==1,1,0),
         seror = ifelse(is.na(age_diff),NA,seror),
         serorn = cumsum(ifelse(is.na(seror),0,seror)),
         serorn = ifelse(seror==1,serorn,0)
         ) %>%
  select(
         id,sampleid,
         starts_with("age"),
         pathogen,antigen,antigenf,
         mfi,logmfi,logmfi_dlag,logmfi_dlead,
         roccut,mixcut,serocut,
         posroc,posmix,seropos,seroi,seroin,seror,serorn,
         -logmfi_lag,-logmfi_lead,-seropos_lag)

# incident seroconversions based on a 4-fold increase in MFI
# with a second measure above the seropositivity cutoff
# incident seroreversions based on a 4-fold decrease in MFI
# with the first measure above the seropositivity cutoff
dl2 <- dl2 %>%
  mutate(seroi4fold = ifelse(logmfi_dlag>log10(4) & logmfi>serocut,1,0),
         seroin4fold = cumsum(ifelse(is.na(seroi4fold),0,seroi4fold)),
         seroin4fold = ifelse(seroi4fold==1,seroin4fold,0),
         
         seror4fold=ifelse(logmfi_dlag< -log10(4) & lag(logmfi)>serocut,1,0),
         serorn4fold = cumsum(ifelse(is.na(seror4fold),0,seror4fold)),
         serorn4fold = ifelse(seror4fold==1,serorn4fold,0)
         )

Create labels for different seroconversion patterns

dl2pp <- dl2 %>%
  arrange(antigen,id,age) %>%
  group_by(antigen,id) %>%
  mutate(measnum=row_number()) %>%
  mutate(
    logmfi_chng = ifelse(logmfi_dlag>0,"Increasing","Decreasing"),
    logmfi_chng = ifelse(is.na(logmfi_chng) & logmfi_dlead>0,"Increasing",logmfi_chng),
    logmfi_chng = ifelse(is.na(logmfi_chng) & logmfi_dlead<0,"Decreasing",logmfi_chng),
    logmfi_chng = factor(logmfi_chng,levels=c("Increasing","Decreasing")),
    
    sero_chng = ifelse(lead(seroi4fold)==1,">4-fold increase to above cutoff","<4-fold change"),
    sero_chng = ifelse(lead(seror4fold)==1,">4-fold decrease from above cutoff",sero_chng),
    sero_chng = ifelse(is.na(sero_chng),lead(sero_chng),sero_chng),
    sero_chng = ifelse(is.na(sero_chng),lag(sero_chng),sero_chng),
    sero_chng = factor(sero_chng,levels=c(">4-fold increase to above cutoff",">4-fold decrease from above cutoff","<4-fold change")),
    
    sero_comp = ifelse(lead(seroi)==1 & lead(seroi4fold==1),">4-fold increase, across cutoff","<4-fold change"),
    sero_comp = ifelse(lead(seroi)==0 & lead(seroi4fold==1),">4-fold increase, above cutoff",sero_comp),
    sero_comp = ifelse(lead(seror4fold)==1,">4-fold decrease",sero_comp),
    sero_comp = factor(sero_comp,levels=c(">4-fold increase, across cutoff",">4-fold increase, above cutoff",">4-fold decrease","<4-fold change")),
    
    everseroi = max(seroi,na.rm=TRUE)
    
  ) %>%
  mutate(antigenf=factor(antigenf,levels=mbalabs)) # re-level the factor to put Salmonella on same row

# custom log labels
log10labs <- c( 
  expression(10^0),
  expression(10^1),
  expression(10^2),
  expression(10^3),
  expression(10^4)
)

Longitudinal child trajectories

## 
##   1   2   3   4   5   6   7   8   9 
## 142 142 140 131 111  66  29   9   1
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00274  2.54097  4.30000  4.41586  5.96875 11.54167

Figure 4 - supplement 2

Estimate prevalence of each class by measurement number

Method agreement for identifying seroconversion

Compare the agreement in the identification of seroconversions based on two approaches: crossing the seropositivity threshold, and a 4-fold increase in MFI. Compare the approaches separately for the primary boosting event and subsequent boosting

Compare classification of incident seroconversions

Classification agreement of seroconversion by based on seropositivity cutoffs and a 4-fold increase in MFI
Antigen N Seropos 0 4-fold incr. 0 Seropos 1 4-fold incr. 0 Seropos 0 4-fold incr. 1 Seropos 1 4-fold incr. 1 Agreement Kappa
Giardia VSP-3 629 486 32 30 81 0.901 0.663
Giardia VSP-5 629 482 27 37 83 0.898 0.660
Cryptosporidium Cp17 629 441 12 106 70 0.812 0.444
Cryptosporidium Cp23 629 455 10 77 87 0.862 0.587
E. histolytica LecA 629 499 11 33 86 0.930 0.755
Salmonella LPS group B 629 479 19 66 65 0.865 0.528
Salmonella LPS group D 629 504 27 36 62 0.900 0.604
ETEC LT B subunit 629 596 0 22 11 0.965 0.487
Norovirus GI.4 629 502 11 47 69 0.908 0.652
Norovirus GII.4.NO 629 507 13 55 54 0.892 0.555

Distribution of change in MFI

Distribution of changes in MFI by seroconversion status

## Warning: Factor `serostat4fold` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Removed 624 rows containing non-finite values (stat_boxplot).
## Warning: Removed 624 rows containing missing values (geom_point).

Distribution of changes in MFI by age category

## Warning: Removed 1420 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1420 rows containing missing values (geom_point).

Session Info

## R version 3.5.3 (2019-03-11)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doParallel_1.0.11     iterators_1.0.9       foreach_1.4.4        
##  [4] ROCR_1.0-7            gplots_3.0.1          kableExtra_0.8.0.0001
##  [7] forcats_0.3.0         stringr_1.4.0         dplyr_0.8.0.1        
## [10] purrr_0.3.2           readr_1.1.1           tidyr_0.8.3          
## [13] tibble_2.1.1          ggplot2_3.1.1         tidyverse_1.2.1      
## [16] here_0.1             
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         lubridate_1.7.4    lattice_0.20-38   
##  [4] gtools_3.5.0       assertthat_0.2.1   rprojroot_1.3-2   
##  [7] digest_0.6.18      psych_1.8.4        R6_2.4.0          
## [10] cellranger_1.1.0   plyr_1.8.4         backports_1.1.4   
## [13] evaluate_0.13      highr_0.8          httr_1.4.0        
## [16] pillar_1.4.0       rlang_0.3.4        lazyeval_0.2.2    
## [19] readxl_1.1.0       rstudioapi_0.9.0   gdata_2.18.0      
## [22] rmarkdown_1.12     foreign_0.8-71     munsell_0.5.0     
## [25] broom_0.4.4        compiler_3.5.3     modelr_0.1.2      
## [28] xfun_0.6           pkgconfig_2.0.2    mnormt_1.5-5      
## [31] htmltools_0.3.6    tidyselect_0.2.5   codetools_0.2-16  
## [34] viridisLite_0.3.0  crayon_1.3.4       withr_2.1.2       
## [37] bitops_1.0-6       grid_3.5.3         nlme_3.1-137      
## [40] jsonlite_1.6       gtable_0.3.0       magrittr_1.5      
## [43] scales_1.0.0       KernSmooth_2.23-15 cli_1.1.0         
## [46] stringi_1.4.3      reshape2_1.4.3     xml2_1.2.0        
## [49] tools_3.5.3        glue_1.3.1         hms_0.4.2         
## [52] yaml_2.2.0         colorspace_1.3-2   caTools_1.17.1    
## [55] rvest_0.3.2        knitr_1.22         haven_2.1.0